In this project, we will bw working on New Delhi dataset. New Delhi is the Capital of India and has a population of about 2.18 crore (21.8 million). It has a diversity of religions and a lot of people from different parts of the country come here for work. Is is a central hub of various business and political work. New Delhi are used interchangeably to refer to the National Capital Territory of Delhi (NCT), these are two distinct entities, with New Delhi forming a small part of Delhi. The National Capital Region is a much larger entity comprising the entire NCT along with adjoining districts in neighboring states.
There are various types of restraunts and famous for variety of food.
In this project we will visualize hot spots and various neighboring restraunts of Delhi.
To do this project, we will use the followinf data sets.
New Delhi Resturants data that contains list Locality, Resturant name,Rating along with their latitude and longitude.
The link to the data is : https://www.kaggle.com/shrutimehta/zomato-restaurants-data
Nearby places in each locality of new delhi city.
import types
import pandas as pd
from botocore.client import Config
import ibm_boto3
import numpy as np
import requests
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
#!conda install -c conda-forge folium=0.5.0 --yes ]
import folium
#! pip install geocoder
import geocoder
def __iter__(self): return 0
# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share the notebook.
client_afe55041fc984c29ae3da0f99b4efe8b = ibm_boto3.client(service_name='s3',
ibm_api_key_id='xxxxxxxxxxxxxxxxx',
ibm_auth_endpoint="xxxxxxxxxxxxxxxxxxxx",
config=Config(signature_version='oauth'),
endpoint_url='xxxxxxxxxxxxxxxxxxxxxxx')
body = client_afe55041fc984c29ae3da0f99b4efe8b.get_object(Bucket='xxxxxxxxxxxxxxxxxxxxxxxxxxxxx',Key='Delhi_Data.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )
# If you are reading an Excel file into a pandas DataFrame, replace `read_csv` by `read_excel` in the next statement.
rawdf = pd.read_csv(body,encoding='ISO-8859-1')
rawdf.head()
df=rawdf.loc[rawdf['City']=='New Delhi']
df.head()
rdf= df[df.Longitude !=0.000000][['Restaurant Name','Locality','Longitude','Latitude','Cuisines','Aggregate rating','Rating text','Votes']]
rdf = rdf[rdf['Aggregate rating'] !=0.0]
rdf.head()
New_Delhi_Rest = folium.Map(location=[28.52, 77.25], zoom_start=12)
df_Res=rdf
X = df_Res['Latitude']
Y = df_Res['Longitude']
Z = np.stack((X, Y), axis=1)
kmeans = KMeans(n_clusters=5, random_state=0).fit(Z)
clusters = kmeans.labels_
colors = ['red', 'green', 'blue', 'yellow','orange']
df_Res ['Cluster'] = clusters
for latitude, longitude, Locality, cluster in zip(df_Res['Latitude'], df_Res['Longitude'], df_Res['Locality'], df_Res['Cluster']):
label = folium.Popup(Locality, parse_html=True)
folium.CircleMarker(
[latitude, longitude],
radius=5,
popup=label,
color='black',
fill=True,
fill_color=colors[cluster],
fill_opacity=0.7).add_to(New_Delhi_Rest)
New_Delhi_Rest
df_Res_Loc = df_Res.groupby('Locality').count()['Restaurant Name'].to_frame()
df_Res_rating= df_Res.groupby('Locality')['Aggregate rating'].mean().to_frame()
d_Cuisines = df_Res.groupby(['Locality'])['Cuisines'].agg(', '.join).reset_index()
d_R = df_Res.groupby(['Locality'])['Rating text'].unique().agg(', '.join).reset_index()
d_V = df_Res.groupby(['Locality'])['Votes'].sum().to_frame()
d_Lat = df_Res.groupby('Locality').mean()['Latitude'].to_frame()
d_Lng = df_Res.groupby('Locality').mean()['Longitude'].to_frame()
df_final = pd.merge(d_Lat,d_Lng,on='Locality').merge(df_Res_Loc, on='Locality').merge(d_Cuisines, on='Locality').merge(df_Res_rating,on ='Locality').merge(d_R, on ='Locality').merge(d_V, on ='Locality')
df_final = df_final[df_final['Aggregate rating'] != 0.000000]
df_final.columns =['Locality','Lat','Lng', 'No_of_Restaurant','Cusines', 'Agg_Rating','Comments' ,'No_of_Votes']
df_final.head()
CLIENT_ID = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
CLIENT_SECRET = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
VERSION = 'xxxxxxxx'
def getNearbyVenues(names, latitudes, longitudes, radius=500,LIMIT = 100):
venues_list=[]
for name, lat, lng in zip(names, latitudes, longitudes):
print(name)
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
CLIENT_ID,
CLIENT_SECRET,
VERSION,
lat,
lng,
radius,
LIMIT)
results = requests.get(url).json()["response"]['groups'][0]['items']
venues_list.append([(
name,
lat,
lng,
v['venue']['name'],
v['venue']['location']['lat'],
v['venue']['location']['lng'],
v['venue']['categories'][0]['name']) for v in results])
nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
nearby_venues.columns = ['Locality',
'Locality Latitude',
'Locality Longitude',
'Venue',
'Venue Latitude',
'Venue Longitude',
'Venue Category']
return(nearby_venues)
new_Delhi_venues = getNearbyVenues(names=df_final['Locality'],
latitudes=df_final['Lat'],
longitudes=df_final['Lng']
)
new_Delhi_venues.head()
# one hot encoding
new_Delhi_onehot = pd.get_dummies(new_Delhi_venues[['Venue Category']], prefix="", prefix_sep="")
# add Locality column back to dataframe
new_Delhi_onehot['Locality'] = new_Delhi_venues['Locality']
# move Locality column to the first column
column_list = new_Delhi_onehot.columns.tolist()
column_number = int(column_list.index('Locality'))
column_list = [column_list[column_number]] + column_list[:column_number] + column_list[column_number+1:]
new_Delhi_onehot = new_Delhi_onehot[column_list]
New_Delhi_grouped = new_Delhi_onehot.groupby('Locality').mean().reset_index()
New_Delhi_grouped.head()
def return_most_common_venues(row, num_top_venues):
row_categories = row.iloc[1:]
row_categories_sorted = row_categories.sort_values(ascending=False)
return row_categories_sorted.index.values[0:num_top_venues]
num_top_venues = 10
indicators = ['st', 'nd', 'rd']
# create columns according to number of top venues
columns = ['Locality']
for ind in np.arange(num_top_venues):
try:
columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
except:
columns.append('{}th Most Common Venue'.format(ind+1))
# create a new dataframe
Locality_venues_sorted = pd.DataFrame(columns=columns)
Locality_venues_sorted['Locality'] = New_Delhi_grouped['Locality']
for ind in np.arange(New_Delhi_grouped.shape[0]):
Locality_venues_sorted.iloc[ind, 1:] = return_most_common_venues(New_Delhi_grouped.iloc[ind, :], num_top_venues)
Locality_venues_sorted.head()
kclusters = 5
New_Delhi_clustering = New_Delhi_grouped.drop('Locality', 1)
# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(New_Delhi_clustering)
# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]
kmeans.labels_.shape
# add clustering labels
New_Delhi_merged = df_final.head(239)
New_Delhi_merged['Cluster Labels'] = kmeans.labels_
# merge New_Delhi_grouped with df_Chinese to add latitude/longitude for each Locality
New_Delhi_merged = New_Delhi_merged.join(Locality_venues_sorted.set_index('Locality'), on='Locality')
New_Delhi_merged.head()
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
#colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
#rainbow = [colors.rgb2hex(i) for i in colors_array]
colors = ['red', 'green', 'blue', 'yellow','orange']
markers_colors = []
for lat, lon, poi, cluster in zip(New_Delhi_merged['Lat'], New_Delhi_merged['Lng'], New_Delhi_merged['Locality'], New_Delhi_merged['Cluster Labels']):
label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
folium.CircleMarker(
[lat, lon],
radius=5,
popup=label,
color='black',
fill=True,
fill_color=colors[cluster],
fill_opacity=0.7).add_to(map_clusters)
map_clusters
New_Delhi_merged.loc[New_Delhi_merged['Cluster Labels'] == 0, New_Delhi_merged.columns[[1] + list(range(5, New_Delhi_merged.shape[1]))]].head()
New_Delhi_merged.loc[New_Delhi_merged['Cluster Labels'] == 1, New_Delhi_merged.columns[[1] + list(range(5, New_Delhi_merged.shape[1]))]].head()
New_Delhi_merged.loc[New_Delhi_merged['Cluster Labels'] == 2, New_Delhi_merged.columns[[1] + list(range(5, New_Delhi_merged.shape[1]))]].head()
New_Delhi_merged.loc[New_Delhi_merged['Cluster Labels'] ==3 , New_Delhi_merged.columns[[1] + list(range(5, New_Delhi_merged.shape[1]))]].head()
New_Delhi_merged.loc[New_Delhi_merged['Cluster Labels'] == 4, New_Delhi_merged.columns[[1] + list(range(5, New_Delhi_merged.shape[1]))]].head()